import pandas as pd
import itertools
import plotly.graph_objects as go
data = pd.read_csv('complete.csv')
data.head()
| awardYear | category | categoryFullName | sortOrder | portion | prizeAmount | prizeAmountAdjusted | dateAwarded | prizeStatus | motivation | ... | org_founded_country | org_founded_countryNow | org_founded_locationString | ind_or_org | residence_1 | residence_2 | affiliation_1 | affiliation_2 | affiliation_3 | affiliation_4 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2001 | Economic Sciences | The Sveriges Riksbank Prize in Economic Scienc... | 2 | 1/3 | 10000000 | 12295082 | 2001-10-10 | received | for their analyses of markets with asymmetric ... | ... | NaN | NaN | NaN | Individual | NaN | NaN | Stanford University, Stanford, CA, USA | NaN | NaN | NaN |
| 1 | 1975 | Physics | The Nobel Prize in Physics | 1 | 1/3 | 630000 | 3404179 | 1975-10-17 | received | for the discovery of the connection between co... | ... | NaN | NaN | NaN | Individual | NaN | NaN | Niels Bohr Institute, Copenhagen, Denmark | NaN | NaN | NaN |
| 2 | 2004 | Chemistry | The Nobel Prize in Chemistry | 1 | 1/3 | 10000000 | 11762861 | 2004-10-06 | received | for the discovery of ubiquitin-mediated protei... | ... | NaN | NaN | NaN | Individual | NaN | NaN | Technion - Israel Institute of Technology, Hai... | NaN | NaN | NaN |
| 3 | 1982 | Chemistry | The Nobel Prize in Chemistry | 1 | 1 | 1150000 | 3102518 | 1982-10-18 | received | for his development of crystallographic electr... | ... | NaN | NaN | NaN | Individual | NaN | NaN | MRC Laboratory of Molecular Biology, Cambridge... | NaN | NaN | NaN |
| 4 | 1979 | Physics | The Nobel Prize in Physics | 2 | 1/3 | 800000 | 2988048 | 1979-10-15 | received | for their contributions to the theory of the u... | ... | NaN | NaN | NaN | Individual | NaN | NaN | International Centre for Theoretical Physics, ... | Imperial College, London, United Kingdom | NaN | NaN |
5 rows × 52 columns
awards_in_year = data.groupby(['awardYear', 'category']).size() \
.to_frame('totalNumberOfAwards') \
.reset_index()
categories = awards_in_year['category'].unique()
year_min = awards_in_year['awardYear'].min()
year_max = awards_in_year['awardYear'].max()
years = list(range(year_min, year_max+1))
years_categories = itertools.product(years, categories)
new_index = pd.MultiIndex.from_tuples(years_categories, names=['awardYear', 'category'])
awards_in_year = awards_in_year.set_index(['awardYear', 'category']).reindex(new_index)
awards_by_year = awards_in_year.groupby(level=1) \
.cumsum() \
.groupby(level=1) \
.fillna(method='ffill') \
.fillna(0) \
.reset_index()
awards_by_year
| awardYear | category | totalNumberOfAwards | |
|---|---|---|---|
| 0 | 1901 | Chemistry | 1.0 |
| 1 | 1901 | Literature | 1.0 |
| 2 | 1901 | Peace | 2.0 |
| 3 | 1901 | Physics | 1.0 |
| 4 | 1901 | Physiology or Medicine | 1.0 |
| ... | ... | ... | ... |
| 709 | 2019 | Literature | 116.0 |
| 710 | 2019 | Peace | 134.0 |
| 711 | 2019 | Physics | 213.0 |
| 712 | 2019 | Physiology or Medicine | 219.0 |
| 713 | 2019 | Economic Sciences | 84.0 |
714 rows × 3 columns
awards_by_year['color'] = awards_by_year['category'].map({'Chemistry': '#FD0100',
'Literature': '#F76915',
'Peace': '#EEDE04',
'Physics': '#A0D636',
'Physiology or Medicine': '#2FA236',
'Economic Sciences': '#333ED4'
})
def frames_animation(data, title):
list_of_frames = []
min_year = data['awardYear'].min()
max_year = data['awardYear'].max()
for year in range(min_year, max_year+1):
df = data[data['awardYear'] == year]
df = df.sort_values(by='totalNumberOfAwards', ascending=False)
list_of_frames.append(go.Frame(data=[go.Bar(x=df['category'],
y=df['totalNumberOfAwards'],
marker_color=df['color'],
hoverinfo='none',
textposition='outside',
texttemplate='%{y}',
cliponaxis=False,
)
],
layout=go.Layout(font={'size': 14},
xaxis={'showline': True, 'visible': True},
yaxis={'showline': True, 'visible': True},
bargap=0.15,
title=go.layout.Title(
text=f'{title} {year} <br><sup>Sorted in descending order</sup>',
xref='paper',
x=0),
)
)
)
return list_of_frames
def bar_race_plot (df, title, list_of_frames):
min_year = df['awardYear'].min()
initial_df = initial_names = df[df['awardYear'] == min_year]
initial_names = initial_df['category']
initial_numbers = initial_df['totalNumberOfAwards']
initial_color = initial_df['color']
fig = go.Figure(
data=[go.Bar(x=initial_names,
y=initial_numbers,
marker_color=initial_color,
hoverinfo='none',
textposition='outside',
texttemplate='%{y}',
cliponaxis=False)],
layout=go.Layout(font={'size': 14},
xaxis={'showline': True, 'visible': True},
yaxis={'showline': True, 'visible': True,
'range': (0, 300)},
bargap=0.15,
title=go.layout.Title(
text=f'{title} {min_year} <br><sup>Sorted in descending order</sup>',
xref='paper',
x=0),
updatemenus=[dict(type="buttons",
buttons=[dict(label="Play",
method="animate",
args=[None,
{"frame": {"duration": 200, "redraw": True},
"fromcurrent": True}
]),
dict(label="Stop",
method="animate",
args=[[None],
{"frame": {"duration": 0, "redraw": False},
"mode": "immediate","transition": {"duration": 0}}
])
])
]),
frames=list(list_of_frames))
fig.update_xaxes(tickangle=40)
return fig
title = 'Total Number of Nobel Prize Laureates by Category'
list_of_frames = frames_animation(awards_by_year, title)
fig = bar_race_plot(awards_by_year, title, list_of_frames)
fig.show()